leaguedf <- read_csv('../../data_sets/S13LeagueOfLegendsData.csv',
col_types=c('c', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'c'),
col_names=c('rowno', 'Name', 'Class', 'Role', 'Tier', 'Score', 'Trend', "WinRate", "RoleRate", "PickRate", "BanRate", 'KDA', 'Patch'), skip=1) %>%
column_to_rownames("rowno") %>%
mutate(PickBanRate = PickRate + BanRate, Patch = as.numeric(str_replace(Patch, '(.*?)_(.*?)', ''))) %>%
select(-c(PickRate, BanRate))
stats <- leaguedf %>% group_by(Name) %>%
summarize(sdWinRate = sd(WinRate), sdPickBanRate = sd(PickBanRate))
leaguedf <- inner_join(stats, leaguedf, 'Name')
leaguedfRole <- leaguedf %>%
mutate(Role = case_when(Role == "ADC" ~ "Attack Damage Carry", TRUE ~ str_to_title(Role)))
#SD by patch
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
leaguedfRole %>%
group_by(Name) %>% filter(Class != "NULL") %>%
ggplot() + geom_point(mapping=aes(x=sdWinRate, y=sdPickBanRate, color = Class)) + facet_wrap(~ Role) + xlab("Standard Deviation of Win Rate") + ylab("Standard Deviation of Pick Ban Rate") + ggtitle("Volatility of Champions")
#Win rate and Pick Ban Rate over time seperated by Role
PatchRoleStats <- leaguedfRole %>% filter(Role != "Jungle") %>%
group_by(Role, Patch) %>%
summarize(meanWinRate = mean(WinRate), meanPBRate = mean(PickBanRate),.groups='keep')
plot1a <- leaguedfRole %>% filter(Role != "Jungle") %>%
ggplot() + geom_line(mapping=aes(x=Patch, y=WinRate, color=Name, alpha=0.001)) +
geom_line(data = PatchRoleStats, mapping = aes(x=Patch, y=meanWinRate), color="black")+
theme(legend.position="none") + facet_wrap( ~ Role) + ggtitle("Win Rate over Time seperated by Role") + xlab('') + ylab("")
plot1b <- leaguedfRole %>% filter(Role != "Jungle") %>%
ggplot() +
geom_line(mapping=aes(x=Patch, y=PickBanRate, color=Name, alpha=0.001)) +
geom_line(data = PatchRoleStats, mapping=aes(x=Patch, y=meanPBRate), color="black") +
theme(legend.position="above") + facet_wrap( ~ Role) + ggtitle("PBRate over Time Seperated by Role") + xlab('') + ylab("")
#Win Rate and Pick Ban Rate over Time for just Heimerdinger seperated by Role
plot2a <- leaguedfRole %>% filter(Name == "Heimerdinger") %>%
ggplot() + geom_line(mapping = aes(x = Patch, y = WinRate, alpha=0.5), color="green") +
geom_line(data = PatchRoleStats, mapping=aes(x=Patch, y=meanWinRate), color="black") +
facet_wrap(~ Role) + theme(legend.position="none") + xlab("Patch") + ylab("")
plot2b <- leaguedfRole %>% filter(Name == "Heimerdinger") %>%
ggplot() + geom_line(mapping = aes(x = Patch, y = PickBanRate), color="green") +
geom_line(data = PatchRoleStats, mapping=aes(x=Patch, y=meanPBRate), color="black") +
facet_wrap("Role") + theme(legend.position="none") + scale_x_discrete("Patch", labels = 1:24) + ylab('')
patched <- (plot1a & plot1b) / (plot2a & plot2b)
patched + plot_annotation(
title = "Time Series plotting for Winrate and PickBanrate",
subtitle = "With a special appearance by Heimerdinger!",
caption = "Lines that dissapear and reappearindicate that the chapmion did not have a high enough play rate to be considered for that role."
)
Is this useful?????? What do you gain from it??
#Covariation of PBR for champions.
Spread_Data <- leaguedf %>%
pivot_wider(id_cols = c('Name', 'Role'), names_from='Patch', values_from='PickBanRate') %>%
mutate(ID = paste(Name, Role, sep='.')) %>%
select(-c(1,2)) %>%
na.omit()
NameList = Spread_Data$ID
Spread_Data <- as_tibble(t(Spread_Data)) %>%
filter(row_number() <= n() - 1) %>%
mutate_if(is.character, as.numeric)
colnames(Spread_Data) <- NameList
CorrMat <- as_tibble(cor(Spread_Data))
rownames(CorrMat) <- colnames(CorrMat)
df <- CorrMat %>% rownames_to_column(var = "Champion1") %>% select(Senna.SUPPORT, `Tahm Kench.SUPPORT`, Ashe.ADC, Champion1) %>%
gather(key = "Champion", value = "Correlation", -Champion1) %>%
filter(Correlation < 1) %>%
group_by(Champion) %>%
arrange(Correlation) %>%
mutate(label = case_when(
row_number() <= 1 ~ str_to_title(str_replace(Champion1, '\\.', ' ')),
row_number() > n() - 1 ~ str_to_title(str_replace(Champion1, '\\.', ' ')), # This adds a Space into the name where the . is and uncapitalizes the second role
Champion == "Tahm Kench.SUPPORT" & Correlation > 0.68 ~ "Senna Support", # This is an outlier so labeling is justified, especially since it helps show the part of the plot
TRUE ~ as.character(NA)
))
ggplot(data = df, mapping = aes(x=Champion, y = Correlation)) +
geom_boxplot() +
ggtitle("PBR Correlation Boxplot")+
scale_x_discrete(labels = c("Ashe ADC", "Senna Support", "Tahm Kench Support")) +
labs(x = "", y = "Pick Ban Rate Correlation Coefficient", caption = "Minimum and Maximum corelation coefficients are annotated, as well as Senna Support for Tahm Kench Support in order\n to best visualize how the strength of certain counters, replacements, and synergies effect Pick Ban Rate.") +
geom_text(aes(label = label), na.rm = TRUE, hjust = -0.1, size = 3)
leaguedf %>%
select("Name", "PickBanRate", "WinRate", "Role", "RoleRate", "Class", "Patch") %>%
filter(!(Class == "NULL")) %>%
group_by(Role) %>%
group_map( ~ plot_ly(data = .,
x = ~ PickBanRate,
y = ~ WinRate,
color = ~ Class,
text = ~ Name,
frame = ~ Patch,
hoverinfo = "text",
type = "scatter",
mode = "markers",
marker = list(size = ~ RoleRate*5)
), .keep = TRUE) %>%
subplot(nrows = 2, shareX = TRUE, shareY=TRUE, margin=0.03) %>%
layout(showlegend = FALSE, title = 'Pick Ban Rate vs. Win Rate by Patch seperated by Role',
plot_bgcolor='#e5ecf6',
xaxis = list(
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
margin = 0.07) %>%
layout(annotations = annotations)
corr_data <- leaguedf %>%
select(c("WinRate", "Role", "KDA")) %>%
pivot_wider(names_from = Role,
values_from = Role,
values_fn = function(x) TRUE,
values_fill = FALSE)
model <- nls(WinRate ~ a*KDA + b*TOP*KDA^2 + c*MID*KDA^2 + d*JUNGLE*KDA^2 + e*SUPPORT*KDA^2 + f*ADC*KDA^2 + g, data = corr_data, start = list(a = 0.01, b = 0.01, c = 0.01, d = 0.01, e = 0.01, f = 0.01, g = 0.4))
predict_wr <- function(kda, top, mid, jungle, support, adc) {
predict(model, newdata = data.frame(KDA = kda, TOP = top, MID = mid, JUNGLE = jungle, SUPPORT = support, ADC = adc))
}
train_control <- trainControl(method = "repeatedcv", number = 25, repeats = 5)
nls1 <- train(WinRate ~ predict_wr(KDA, TOP, MID, JUNGLE, SUPPORT, ADC), data = corr_data,
method = "lm",
trControl = train_control,
preProcess = c("center", "scale"))
summary(nls1)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.096415 -0.009614 0.000986 0.011024 0.059827
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.503830 0.000225 2239.54
## `predict_wr(KDA, TOP, MID, JUNGLE, SUPPORT, ADC)` 0.006455 0.000225 28.69
## Pr(>|t|)
## (Intercept) <2e-16 ***
## `predict_wr(KDA, TOP, MID, JUNGLE, SUPPORT, ADC)` <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01659 on 5435 degrees of freedom
## Multiple R-squared: 0.1315, Adjusted R-squared: 0.1314
## F-statistic: 823.2 on 1 and 5435 DF, p-value: < 2.2e-16
summary(model)
##
## Formula: WinRate ~ a * KDA + b * TOP * KDA^2 + c * MID * KDA^2 + d * JUNGLE *
## KDA^2 + e * SUPPORT * KDA^2 + f * ADC * KDA^2 + g
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 0.0165073 0.0014125 11.687 < 2e-16 ***
## b 0.0011164 0.0002740 4.075 4.66e-05 ***
## c 0.0003812 0.0002622 1.454 0.14608
## d -0.0009877 0.0002625 -3.762 0.00017 ***
## e -0.0012356 0.0002554 -4.838 1.35e-06 ***
## f 0.0009030 0.0002786 3.241 0.00120 **
## g 0.4662019 0.0021234 219.556 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0166 on 5430 degrees of freedom
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 3.226e-08
MidGrid <- expand.grid(KDA = seq(0,5, length.out = 501), MID = c(TRUE), TOP = FALSE, ADC = FALSE, SUPPORT = FALSE, JUNGLE = FALSE)
MidGrid$WinRate <- predict(nls1, MidGrid)
JungleGrid <- expand.grid(KDA = seq(0,5, length.out = 501), MID = FALSE, TOP = FALSE, ADC = FALSE, SUPPORT = FALSE, JUNGLE = TRUE)
JungleGrid$WinRate <- predict(nls1, JungleGrid)
TopGrid <- expand.grid(KDA = seq(0,5, length.out = 501), MID = FALSE, TOP = T, ADC = FALSE, SUPPORT = FALSE, JUNGLE = FALSE)
TopGrid$WinRate <- predict(nls1, TopGrid)
ADCGrid <- expand.grid(KDA = seq(0,5, length.out = 501), MID = F, TOP = FALSE, ADC = T, SUPPORT = FALSE, JUNGLE = FALSE)
ADCGrid$WinRate <- predict(nls1, ADCGrid)
SupportGrid <- expand.grid(KDA = seq(0,5, length.out = 501), MID = F, TOP = FALSE, ADC = FALSE, SUPPORT = T, JUNGLE = FALSE)
SupportGrid$WinRate <- predict(nls1, SupportGrid)
plot1a <- corr_data %>% filter(MID == TRUE) %>%
ggplot(mapping = aes(x = KDA, y = WinRate)) +
geom_point(alpha = 0.1) +
geom_line(data = MidGrid, mapping = aes(x = KDA, y = WinRate), color = "blue") +
ggtitle("Mid")
plot1b <- corr_data %>% filter(ADC == TRUE) %>%
ggplot(mapping = aes(x = KDA, y = WinRate)) +
geom_point(alpha = 0.1) +
geom_line(data = ADCGrid, mapping = aes(x = KDA, y = WinRate), color = "red")+
ggtitle("ADC")
plot1c <- corr_data %>% filter(SUPPORT == TRUE) %>%
ggplot(mapping = aes(x = KDA, y = WinRate)) +
geom_point(alpha = 0.1) +
geom_line(data = SupportGrid, mapping = aes(x = KDA, y = WinRate), color = "green") +
ggtitle("Support")
plot2a <- corr_data %>% filter(JUNGLE== TRUE) %>%
ggplot(mapping = aes(x = KDA, y = WinRate)) +
geom_point(alpha = 0.1) +
geom_line(data = JungleGrid, mapping = aes(x = KDA, y = WinRate), color = "purple") +
ggtitle("Jungle")
plot2b <- corr_data %>% filter(TOP == TRUE) %>%
ggplot(mapping = aes(x = KDA, y = WinRate)) +
geom_point(alpha = 0.1) +
geom_line(data = TopGrid, mapping = aes(x = KDA, y = WinRate), color = "orange") +
ggtitle("Top")
pw <- (plot1a & plot1b) / (plot1c) / (plot2a & plot2b)
pw + plot_annotation(
title = "Quadratic Regression of KDA and Winrate",
subtitle = "Seperated by Role",
caption = "Support has a strong negative coefficient for the 2nd degree, emphasizing that support sees lower payoffs than other roles for high KDA."
)